Setup

Packages Used

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(knitr)
if (!require("gganimate")) install.packages("gganimate")
## Loading required package: gganimate
library(gganimate)
if (!require("transformr")) install.packages("transformr")
## Loading required package: transformr
## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")

## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")

## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")

## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")

## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")
library(transformr)
if (!require("plotly")) install.packages("plotly")
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(plotly)
if (!require("kableExtra")) install.packages("kableExtra")
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(kableExtra)

Datasets Used

bills <- read_csv("../dataraw/billionaires_2021_10_31.csv")
## Rows: 500 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Name, Total_Net_Worth, LastChange, YTDChange, Country, Industry
## dbl (4): Rank, Total_Net_Worth_Bil, LastChange_Bil, YTDChange_Bil
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(bills)
## Rows: 500
## Columns: 10
## $ Rank                <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ Name                <chr> "Elon Musk", "Jeff Bezos", "Bernard Arnault", "Bil…
## $ Total_Net_Worth     <chr> "$311B", "$195B", "$167B", "$136B", "$131B", "$126…
## $ Total_Net_Worth_Bil <dbl> 311.0, 195.0, 167.0, 136.0, 131.0, 126.0, 121.0, 1…
## $ LastChange          <chr> "+$9.32B", "-$3.79B", "-$544M", "+$906M", "+$1.71B…
## $ LastChange_Bil      <dbl> 9.3200, -3.7900, -0.5440, 0.9060, 1.7100, 1.6400, …
## $ YTDChange           <chr> "+$141B", "+$5.06B", "+$52.7B", "+$4.40B", "+$48.7…
## $ YTDChange_Bil       <dbl> 141.00, 5.06, 52.70, 4.40, 48.70, 46.70, 17.30, 37…
## $ Country             <chr> "UnitedStates", "UnitedStates", "France", "UnitedS…
## $ Industry            <chr> "Technology", "Technology", "Consumer", "Technolog…
is_tibble(bills)
## [1] TRUE
ipeds <- read_csv("https://assets.datacamp.com/production/repositories/1942/datasets/18a000cf70d2fe999c6a6f2b28a7dc9813730e74/ipeds.csv")
## Rows: 3097 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, sector_label
## dbl (2): lat, lng
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(ipeds)
## Rows: 3,097
## Columns: 4
## $ name         <chr> "A T Still University of Health Sciences", "Abilene Chris…
## $ lat          <dbl> 40.19365, 32.46915, 31.48189, 34.06099, 44.85572, 29.6500…
## $ lng          <dbl> -92.58918, -99.70954, -83.52828, -118.30118, -93.29981, -…
## $ sector_label <chr> "Private", "Private", "Public", "For-Profit", "For-Profit…
teenNC <- readRDS("../dataprocessed/CDCteenbirthrateNC.rds") %>% 
  mutate(subregion = tolower(county), # to match county map data
         year = as.integer(year)) # to make the animation labels whole numbers
glimpse(teenNC)
## Rows: 1,600
## Columns: 8
## $ year       <int> 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,…
## $ county     <chr> "Alamance", "Alamance", "Alamance", "Alamance", "Alamance",…
## $ countyFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3,…
## $ FIPS       <dbl> 37001, 37001, 37001, 37001, 37001, 37001, 37001, 37001, 370…
## $ birth_rate <dbl> 46.2, 46.0, 45.4, 47.0, 47.1, 45.2, 41.7, 36.8, 33.2, 30.8,…
## $ LCL        <dbl> 42.2, 43.0, 42.9, 44.5, 44.7, 42.8, 39.5, 34.8, 31.3, 28.9,…
## $ UCL        <dbl> 50.5, 49.1, 48.0, 49.5, 49.7, 47.7, 44.1, 38.9, 35.2, 32.7,…
## $ subregion  <chr> "alamance", "alamance", "alamance", "alamance", "alamance",…
### County Data from Maps Package
countymapNC <- map_data("county") %>% 
  filter(region == "north carolina") %>% 
  select(-region) # not needed since it will be all NC data now
glimpse(countymapNC)
## Rows: 3,669
## Columns: 5
## $ long      <dbl> -79.53800, -79.54372, -79.53800, -79.52081, -79.26298, -79.2…
## $ lat       <dbl> 35.84424, 35.89008, 35.98175, 36.23385, 36.23385, 35.90726, …
## $ group     <dbl> 1857, 1857, 1857, 1857, 1857, 1857, 1857, 1857, 1857, 1857, …
## $ order     <int> 54915, 54916, 54917, 54918, 54919, 54920, 54921, 54922, 5492…
## $ subregion <chr> "alamance", "alamance", "alamance", "alamance", "alamance", …

bills_ex <- bills %>%
  group_by(Industry) %>%
  summarise(Sum = sum(Total_Net_Worth_Bil))

bills_red <- bills %>%
  mutate(
    Industry = ifelse(
        bills$Industry == "Technology" |
        bills$Industry == "Industrial" |
        bills$Industry == "Finance" |
        bills$Industry == "Diversified"|
        bills$Industry == "Consumer",
      Industry,
      "Other"),
    LastChange_Prop = (LastChange_Bil + Total_Net_Worth_Bil) / Total_Net_Worth_Bil,
    LastChange_Coef = (LastChange_Bil + mean(Total_Net_Worth_Bil)) / mean(Total_Net_Worth_Bil)
  ) %>%
  rename(`Net Worth (Billions)` = Total_Net_Worth_Bil,
         `Change YTD` =  YTDChange_Bil)

bills_rex <- bills_red %>%
  group_by(Industry) %>%
  summarise(Sum = sum(`Net Worth (Billions)`))
jitter <- position_jitter(width = 0.2, height = 0, seed = 5)

p <- ggplot(bills_red,
       aes(
         x = 1,
         y = `Change YTD`,
         color = Industry,
         size = `Net Worth (Billions)`,
         text = Name
       )) +
  geom_point(position = jitter) +
  geom_point(shape = 1, colour = "gray60", alpha = .2, position = jitter)+ 
  geom_hline(yintercept=0) +
  scale_color_manual(
      breaks = c("Technology", "Industrial", "Finance", "Diversified", "Consumer", "Other"),
      values = c("#AA4499", "#882255", "#117733", "#DDCC77", "#CC6677", "#88CCEE")) +
  scale_y_continuous(n.breaks = 18, minor_breaks = seq(-20, 20, 2)) +
  scale_x_continuous(breaks = NULL, minor_breaks = NULL) +
  scale_size(range = c(0, 20)) +
  labs(
      title = "Year-to-Date Change in Net Worth of 500 Wealthiest Individuals",
      subtitle = "as of October 31st, 2021",
      x = NULL,
      y = "Change in Fortune, Billions of Dollars"
   ) + 
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_blank())

 ggplotly(p, tooltip = c("text", "size", "y", "color"))

This plot shows which billionaires have been making (and losing) the most money so far this year, color-coded by sector of the economy. The first thing which jumps out is how not only does the tech sector make up an outsized proportion of the wealth of the world’s top billionaires, it is also includes nearly all of the top 10 wealthiest individuals. By contrast, the catch-all category “other” includes more total wealth, but is made up of numerous smaller fortunes. It’s also immediately apparent that the individuals making the most money this year are mostly the ones who already have the most money, and again, most of them are in the tech sector. Finally, by hovering over each balloon, it is possible to see the name of the person. By this, we can see just how rapidly Elon Musk’s fortune is ballooning this year, vastly outstripping all of his closest competitors.

mapdata <- countymapNC %>% 
  full_join(teenNC, by = "subregion")
schoolsNC <- c(
  "Appalachian State University",
  "East Carolina University",
  "Elizabeth City State University",
  "Fayetteville State University",
  "North Carolina A & T State University",
  "North Carolina Central University",
  "North Carolina State University at Raleigh",
  "University of North Carolina at Asheville",
  "University of North Carolina at Chapel Hill",
  "University of North Carolina at Charlotte",
  "University of North Carolina at Greensboro",
  "University of North Carolina at Pembroke",
  "University of North Carolina Wilmington",
  "University of North Carolina School of the Arts",
  "Western Carolina University",
  "Winston-Salem State University"
)

ipedsNC <- ipeds %>%
  filter (name %in% schoolsNC)

print(ipedsNC)
## # A tibble: 16 × 4
##    name                                              lat   lng sector_label
##    <chr>                                           <dbl> <dbl> <chr>       
##  1 Appalachian State University                     36.2 -81.7 Public      
##  2 East Carolina University                         35.6 -77.4 Public      
##  3 Elizabeth City State University                  36.3 -76.2 Public      
##  4 Fayetteville State University                    35.1 -78.9 Public      
##  5 North Carolina A & T State University            36.1 -79.8 Public      
##  6 North Carolina Central University                36.0 -78.9 Public      
##  7 North Carolina State University at Raleigh       35.8 -78.7 Public      
##  8 University of North Carolina at Asheville        35.6 -82.6 Public      
##  9 University of North Carolina at Chapel Hill      35.9 -79.1 Public      
## 10 University of North Carolina at Charlotte        35.3 -80.7 Public      
## 11 University of North Carolina at Greensboro       36.1 -79.8 Public      
## 12 University of North Carolina at Pembroke         34.7 -79.2 Public      
## 13 University of North Carolina School of the Arts  36.1 -80.2 Public      
## 14 University of North Carolina Wilmington          34.2 -77.9 Public      
## 15 Western Carolina University                      35.3 -83.2 Public      
## 16 Winston-Salem State University                   36.1 -80.2 Public
teen_anim <- mapdata %>%
   ggplot() +
   geom_polygon(aes(x = long, 
                    y = lat, 
                    group = group, 
                    fill = birth_rate),
                color = "black") +
  coord_map() +
  scale_fill_gradient2(low = "white", mid = "lavender", high = "red", midpoint = 30) +

  transition_states(year, transition_length = 0, state_length = 2, wrap = TRUE) +           #
  geom_point(data = ipedsNC, 
             aes(lng, lat, color = "Location of UNC Campuses"),
             size = 1.5) +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5)) +
    scale_colour_manual(values = "black", na.value = "black") +
    labs(title = "Change in Teen Birth Rates in North Carolina",
         subtitle = "Year: {closest_state} , per 1,000 females in age group 15–19 years",     #
         fill = "Teen Birth Rate",
         color = NULL,
         caption = "Data Source: Centers for Disease Control and Prevention") +
  no_axes_theme

animate(teen_anim, duration = 60, end_pause = 20, start_pause = 8)

I was curious if the location of universities was correlated with birth rates in people under 20 years old. Of course, the locations of universities are themselves correlated with urban centers and higher income areas, but there does appear to be a loose correlation. I also wanted to show the change in teen birth rates over time.

Similar to national and global trends, teen birth rates have been generally decreasing across the state over the past two decades. The map shows that areas with the highest rates in 2003 have the most dramatic reductions, but even areas with relatively low rates have seen decreases. Only one county in North Carolina has seen a net increase in the teen birth rate.

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.3.4 plotly_4.10.0    transformr_0.1.3 gganimate_1.0.7 
##  [5] knitr_1.33       maps_3.4.0       forcats_0.5.1    stringr_1.4.0   
##  [9] dplyr_1.0.7      purrr_0.3.4      readr_2.1.1      tidyr_1.1.3     
## [13] tibble_3.1.6     ggplot2_3.3.5    tidyverse_1.3.1 
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.5.1           sf_0.8-0           lubridate_1.8.0    bit64_4.0.5       
##  [5] webshot_0.5.2      progress_1.2.2     httr_1.4.2         tools_3.6.0       
##  [9] backports_1.4.0    bslib_0.3.1        utf8_1.2.2         R6_2.5.1          
## [13] KernSmooth_2.23-20 DBI_1.1.1          lazyeval_0.2.2     colorspace_2.0-2  
## [17] withr_2.4.3        tidyselect_1.1.1   prettyunits_1.1.1  curl_4.3.2        
## [21] bit_4.0.4          compiler_3.6.0     cli_3.1.0          rvest_1.0.2       
## [25] xml2_1.3.3         labeling_0.4.2     sass_0.4.0         scales_1.1.1      
## [29] classInt_0.4-3     proxy_0.4-26       systemfonts_1.0.3  digest_0.6.29     
## [33] rmarkdown_2.11     svglite_2.0.0      pkgconfig_2.0.3    htmltools_0.5.2   
## [37] highr_0.9          dbplyr_2.1.1       fastmap_1.1.0      htmlwidgets_1.5.4 
## [41] rlang_0.4.12       readxl_1.3.1       rstudioapi_0.13    jquerylib_0.1.4   
## [45] farver_2.1.0       generics_0.1.1     jsonlite_1.7.2     crosstalk_1.2.0   
## [49] vroom_1.5.7        magrittr_2.0.1     Rcpp_1.0.7         munsell_0.5.0     
## [53] fansi_0.5.0        lifecycle_1.0.1    stringi_1.7.6      yaml_2.2.1        
## [57] plyr_1.8.6         grid_3.6.0         parallel_3.6.0     crayon_1.4.2      
## [61] haven_2.4.3        mapproj_1.2.7      hms_1.1.1          magick_2.7.3      
## [65] pillar_1.6.4       lpSolve_5.6.15     reprex_2.0.1       glue_1.5.1        
## [69] evaluate_0.14      data.table_1.14.2  modelr_0.1.8       vctrs_0.3.8       
## [73] tzdb_0.2.0         tweenr_1.0.2       cellranger_1.1.0   gtable_0.3.0      
## [77] assertthat_0.2.1   xfun_0.28          broom_0.7.10       e1071_1.7-9       
## [81] class_7.3-19       viridisLite_0.4.0  units_0.7-2        ellipsis_0.3.2